/*******************************************************************************
* Objective: Create "dataset_maps_and_cities.dta"
*******************************************************************************/

version 16

* Stablish Working Directory ***************************************************
cd "$workdirectory"

* Settings *********************************************************************
capture log close
clear all
set more off

********************************************************************************
**# Step 1: Creating share of subdistricts located within a FUA
********************************************************************************

* Importing intersection of FUAs and subdistricts
import delimited "raw_datasets\Maps\intersection.csv", encoding(UTF-8) clear
save "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", replace

* Importing area of subdistricts
* We drop observations without geolevel2 and also observations with geolevel == 888888888, as this seems to identify bodies of water and is not a unique identifier in the sample of subdistricts at world level (it should not contain information on labor shares neither)

import delimited "raw_datasets\Maps\area_subdist.csv", encoding(UTF-8) clear 
keep geolevel2 area_subd
drop if geolevel2 == .
drop if geolevel2 == 888888888
tempfile area_subdist
save `area_subdist', replace

* Merging and obtaining shares
use "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", clear
merge m:1 geolevel2 using `area_subdist'
keep if _merge == 3
drop _merge

gen share = area_calcu/area_subd
sort geolevel2
save "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", replace


********************************************************************************
**# Step 2: Generate industry per city data
********************************************************************************

* Generating empty file to attach data to
clear all
tempfile data_indgen_cities
save `data_indgen_cities', empty replace

* Genearting local with all .csv in the folder
local files : dir "raw_datasets\Maps\indgen_maps" files "*.csv"

cd raw_datasets\Maps\indgen_maps

* Importing, appending and saving
foreach fi in `files' {
  import delimited `fi', varnames(11) rowrange(12) encoding(UTF-8) clear
  gen country_year = subinstr("`fi'",".csv","",.)
  append using `data_indgen_cities'
  save `data_indgen_cities', replace
}

* Cleaning
keep if v1 == "Weighted N"
drop v1
rename v2 census
drop if census == "COL TOTAL"

* COUNTRIES WITH MORE THAN 1 COUNTRY_YEAR PER YEAR
replace country_year = "vietnam_2009" if country_year == "vietnama_2009"
replace country_year = "vietnam_2009" if country_year == "vietnamb_2009"

replace country_year = "philippines_1990" if country_year == "philippinesa_1990"
replace country_year = "philippines_1990" if country_year == "philippinesb_1990"
replace country_year = "philippines_1990" if country_year == "philippinesc_1990"

foreach letter in a b c d e f{
	replace country_year = "brasil_1980" if country_year == "brasil`letter'_1980"
	replace country_year = "brasil_1991" if country_year == "brasil`letter'_1991"
	replace country_year = "brasil_2000" if country_year == "brasil`letter'_2000"
	replace country_year = "brasil_2010" if country_year == "brasil`letter'_2010"
}

* Exctracting geolevel2
gen geolevel2 = regexs(0) if(regexm(census, "[0-9]+"))
destring geolevel2, replace

save `data_indgen_cities', replace

*-------------------------------------------------------------------------------
* Adding Mexico

* Importing
use ipums_mexico, clear

* Cleaning and collapsing by year, municipality and industry
drop country sample serial persons hhwt urban pernum age
sort year geolev2
collapse (sum) perwt, by(year geolev2 indgen)
drop if indgen == .

* Reshaping to fit main dataset format
reshape wide perwt, i(year geolev2) j(indgen)

* Generating variable country_year
gen country_year = ""
foreach y of numlist 1960 1970 1990 1995 2000 2010 2015{
	replace country_year = "mexico_`y'" if year == `y'
}

* Cleaning to fit main dataset
drop year
rename geolev2 geolevel2

* Replacing missings with 0s
foreach v of varlist perwt*{
	replace `v' = 0 if `v' == .
}

* Renaming to match with main dataset
rename perwt0		niunotinuniverse	
rename perwt10		agriculturefishingandforestry 
rename perwt20		miningandextraction
rename perwt30		manufacturing 
rename perwt40		electricitygaswaterandwastemanag 
rename perwt50		construction 
rename perwt60		wholesaleandretailtrade
rename perwt70		hotelsandrestaurants
rename perwt80		transportationstorageandcommunic
rename perwt90		financialservicesandinsurance 
rename perwt100		publicadministrationanddefense
rename perwt111		businessservicesandrealestate
rename perwt112		education 
rename perwt113		healthandsocialwork 
rename perwt114		otherservices 
rename perwt120		privatehouseholdservices
rename perwt130		otherindustrynec	
rename perwt999		unknown

* Generating rowtotal variable
egen rowtotal = rowtotal(niunotinuniverse agriculturefishingandforestry miningandextraction manufacturing electricitygaswaterandwastemanag construction wholesaleandretailtrade hotelsandrestaurants transportationstorageandcommunic financialservicesandinsurance publicadministrationanddefense businessservicesandrealestate education healthandsocialwork otherservices privatehouseholdservices otherindustrynec unknown)

* Appending to main dataset
append using `data_indgen_cities' 

* Saving
save `data_indgen_cities', replace

*-------------------------------------------------------------------------------
* Adding Turkey

use ipums_turkey, clear

* Keeping only urban areas
egen urban = rowtotal(tr1985a_urban tr1990a_urban tr2000a_urban)
keep if urban == 1
drop urban tr1985a_urban tr1990a_urban tr2000a_urban

* Cleaning and collapsing by year, municipality and industry
drop country sample serial hhwt pernum age
sort year geolev2
collapse (sum) perwt, by(year geolev2 indgen)
drop if indgen == .

* Reshaping to fit main dataset format
reshape wide perwt, i(year geolev2) j(indgen)

* Generating variable country_year
gen country_year = ""
foreach y of numlist 1985 1990 2000{
	replace country_year = "turkey_`y'" if year == `y'
}

* Cleaning to fit main dataset
drop year
rename geolev geolevel2

* Replacing missings with 0s
foreach v of varlist perwt*{
	replace `v' = 0 if `v' == .
}

* Renaming to match with main dataset
rename perwt0		niunotinuniverse	
rename perwt10		agriculturefishingandforestry 
rename perwt20		miningandextraction
rename perwt30		manufacturing 
rename perwt40		electricitygaswaterandwastemanag 
rename perwt50		construction 
rename perwt60		wholesaleandretailtrade
rename perwt70		hotelsandrestaurants
rename perwt80		transportationstorageandcommunic
rename perwt90		financialservicesandinsurance 
rename perwt100		publicadministrationanddefense
rename perwt111		businessservicesandrealestate
rename perwt114		otherservices 
rename perwt999		unknown

* Generating rowtotal variable
egen rowtotal = rowtotal(niunotinuniverse agriculturefishingandforestry miningandextraction manufacturing electricitygaswaterandwastemanag construction wholesaleandretailtrade hotelsandrestaurants transportationstorageandcommunic financialservicesandinsurance publicadministrationanddefense businessservicesandrealestate otherservices unknown)

* Appending to main dataset
append using `data_indgen_cities' 

* Saving
save `data_indgen_cities', replace

*-------------------------------------------------------------------------------
* Adding Spain

use ipums_spain, clear

* Keeping only urban population -  Based on World Urbanization Prospects The 2011 Revision, municipalities with over 10,000 people are consider urban
keep if es1991a_munsize > 08
drop es1991a_munsize

* Cleaning and collapsing by year, municipality and industry
drop country sample serial hhwt pernum age
sort year geolev2
collapse (sum) perwt, by(year geolev2 indgen)
drop if indgen == .

* Reshaping to fit main dataset format
reshape wide perwt, i(year geolev2) j(indgen)

* Generating variable country_year
gen country_year = "spain_1991"

* Cleaning to fit main dataset
drop year
rename geolev geolevel2

* Replacing missings with 0s
foreach v of varlist perwt*{
	replace `v' = 0 if `v' == .
}

* Renaming to match with main dataset
rename perwt0		niunotinuniverse	
rename perwt10		agriculturefishingandforestry 
rename perwt20		miningandextraction
rename perwt30		manufacturing 
rename perwt40		electricitygaswaterandwastemanag 
rename perwt50		construction 
rename perwt60		wholesaleandretailtrade
rename perwt70		hotelsandrestaurants
rename perwt80		transportationstorageandcommunic
rename perwt90		financialservicesandinsurance 
rename perwt100		publicadministrationanddefense
rename perwt111		businessservicesandrealestate
rename perwt112		education 
rename perwt113		healthandsocialwork 
rename perwt114		otherservices 
rename perwt120		privatehouseholdservices
// rename perwt130		otherindustrynec	
// rename perwt999		unknown

* Generating rowtotal variable
egen rowtotal = rowtotal(niunotinuniverse agriculturefishingandforestry miningandextraction manufacturing electricitygaswaterandwastemanag construction wholesaleandretailtrade hotelsandrestaurants transportationstorageandcommunic financialservicesandinsurance publicadministrationanddefense businessservicesandrealestate education healthandsocialwork otherservices privatehouseholdservices)

* Appending to main dataset
append using `data_indgen_cities'

* Saving
sort geolevel2
save `data_indgen_cities', replace

********************************************************************************
cd ..\..\..

********************************************************************************

********************************************************************************
**# Step 3: Assigning Employment by Industry to each City
********************************************************************************

* Joining both datasets
use "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", clear
joinby geolevel2 using `data_indgen_cities'

* Renaming industries
rename niunotinuniverse 						niu
rename agriculturefishingandforestry 			agri
rename miningandextraction 						mining
rename manufacturing 							mfg
rename electricitygaswaterandwastemanag 		utilities
rename construction 							construction
rename wholesaleandretailtrade 					trade
rename hotelsandrestaurants 					hospitality
rename transportationstorageandcommunic 		transport
rename financialservicesandinsurance 			fin_insu
rename publicadministrationanddefense 			govmt
rename businessservicesandrealestate 			bussserv_rs
rename education 								educ
rename healthandsocialwork 						health
rename otherservices 							other_serv
rename privatehouseholdservices 				house_serv
rename unknown 									unknown
rename rowtotal 								rowtotal	
rename otherindustrynec 						other_industry
rename servicesnotspecified 					serv_notsp
rename responsesuppressed						resp_supressed

* Applying Shares
foreach v of varlist niu agri mining mfg utilities construction trade hospitality transport fin_insu govmt bussserv_rs educ health other_serv house_serv unknown rowtotal other_industry serv_notsp resp_supressed{
	gen _`v' = `v'*share
}

* Fixing country names from both datasets
replace cntry_na_1 = "Dominican Republic" if cntry_na_1 == "DominicanRepublic"
replace cntry_na_1 = "El Salvador" if cntry_na_1 == "ElSalvador"
replace cntry_na_1 = "Papua New Guinea" if cntry_na_1 == "PapuaNewGuinea"
replace cntry_na_1 = "Sierra Leone" if cntry_na_1 == "SierraLeone"
replace cntry_na_1 = "South Sudan" if cntry_na_1 == "SouthSudan"
replace cntry_na_1 = "Kyrghzstan" if cntry_na_1 == "Kyrgyzstan"
replace cntry_na_1 = "Costa Rica" if cntry_na_1 == "CostaRica"

* Drop intersections which associate subdistricts from country i to a city located in country j
drop if cntry_name != cntry_na_1

* Collapsing by city and census
collapse (sum) _* (mean) fua_p_2015 (first) efua_name ,by(efua_id country_year)

foreach v2 of varlist _agri _mining _mfg _utilities _construction _trade _hospitality _transport _fin_insu _govmt _bussserv_rs _educ _health _other_serv _house_serv _unknown _other_industry _serv_notsp  _resp_supressed {
	gen share`v2' = `v2'/(_rowtotal-_niu)
}

save "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", replace

********************************************************************************
**# Step 4: Adding ADM1, ADM3 and other countries datasets
********************************************************************************

* ADM1
do "codes/secondary_dos/maps_and_cities_adm1_data_creator.do"
use "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", clear
append using "processed_datasets/dataset_maps_and_cities_adm1"
save "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", replace

* ADM3
do "codes/secondary_dos/maps_and_cities_adm3_data_creator.do"
use "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", clear
append using "processed_datasets/dataset_maps_and_cities_adm3"
save "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", replace

* Canada, Phillipines, US and Italy
do "codes/secondary_dos/maps_and_cities_indcountries_data_creator.do"
use "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", clear
append using "processed_datasets\maps_and_cities_canada"
append using "processed_datasets\maps_and_cities_philippines"
append using "processed_datasets\maps_and_cities_usa"
append using "processed_datasets\maps_and_cities_italy"
save "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", replace

********************************************************************************
* Fixing duplicated Guangzhou names

replace efua_name  = "Shenzhen" if efua_id == 9725
replace efua_name  = "Bao'An District'" if efua_id == 9724
replace efua_name  = "Shajing Residential District" if efua_id == 9721
replace efua_name  = "Dongguan" if efua_id == 9718
replace efua_name  = "Humen Town" if efua_id == 9719
replace efua_name  = "Guangzhou" if efua_id == 9711
replace efua_name  = "Panyu District" if efua_id == 9716
replace efua_name  = "Jiangmen" if efua_id == 9715

save "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", replace

********************************************************************************
**# Step 4: Obtaining Urban Center populations
********************************************************************************

* Importing areas of UCs
import delimited raw_datasets\Maps\area_uc.csv, encoding(UTF-8) clear
keep id_hdc_g0 area_uc
tempfile uc_pop
save `uc_pop'

* Importing intersection of UCs and FUAs
import delimited raw_datasets\Maps\intersection_fua_uc.csv, encoding(UTF-8) clear
keep efua_id uc_ids efua_name cntry_name id_hdc_g0 ctr_mn_nm uc_nm_mn p75 p90 p00 p15 area_inter uc_p_2015

* Merging with UC area file
merge m:1 id_hdc_g0 using `uc_pop'
keep if _merge == 3
drop _merge

* Calculating shares of area of intersection UC-FUA on UC area
gen share_uc = area_inter/area_uc
sort efua_id id_hdc_g0

* Generating adjusted population based on new shares
gen p75_adj = p75 * share_uc
gen p90_adj = p90 * share_uc
gen p00_adj = p00 * share_uc
gen p15_adj = p15 * share_uc

tempfile fua_uc_pop
save `fua_uc_pop', replace

* Calculating FUA-UC population option A: Using only largest UC within FUA
use `fua_uc_pop', clear

gsort efua_id -p15_adj
by efua_id: egen double maximum = max(p15_adj)
keep if p15_adj == maximum
keep efua_id p75_adj p90_adj p00_adj p15_adj

rename p75_adj p75_adj_max
rename p90_adj p90_adj_max
rename p00_adj p00_adj_max
rename p15_adj p15_adj_max

la var p75_adj_max "UC pop 75 using max. UC within FUA"
la var p90_adj_max "UC pop 90 using max. UC within FUA"
la var p00_adj_max "UC pop 00 using max. UC within FUA"
la var p15_adj_max "UC pop 15 using max. UC within FUA"

save "processed_datasets\fua_uc_pop_max", replace

* Calculating FUA-UC population option B: Using all UCs withing FUA
use `fua_uc_pop', clear

collapse (sum) p75_adj p90_adj p00_adj p15_adj, by(efua_id)

rename p75_adj p75_adj_wgt
rename p90_adj p90_adj_wgt
rename p00_adj p00_adj_wgt
rename p15_adj p15_adj_wgt

la var p75_adj_wgt "UC pop 75 using weighted pop. of all UCs within FUA"
la var p90_adj_wgt "UC pop 90 using weighted pop. of all UCs within FUA"
la var p00_adj_wgt "UC pop 00 using weighted pop. of all UCs within FUA"
la var p15_adj_wgt "UC pop 15 using weighted pop. of all UCs within FUA"

save "processed_datasets\fua_uc_pop_wgt", replace

* Calculating FUA-UC population option C: Using 2015 UC population measures in UC and FUA datasets
use `fua_uc_pop', clear

drop area_inter area_uc share_uc p75_adj p90_adj p00_adj p15_adj
collapse (sum) p75 p90 p00 p15 (mean) uc_p_2015, by(efua_id)
gen share = uc_p_2015/p15

gen p75_adj_sh = p75*share
gen p90_adj_sh = p90*share
gen p00_adj_sh = p00*share
gen p15_adj_sh = p15*share

la var p75_adj_sh "UC pop 75 using FUA and UC pop. measures for 2015 as shares"
la var p90_adj_sh "UC pop 90 using FUA and UC pop. measures for 2015 as shares"
la var p00_adj_sh "UC pop 00 using FUA and UC pop. measures for 2015 as shares"
la var p15_adj_sh "UC pop 15 using FUA and UC pop. measures for 2015 as shares"

keep efua_id p75_adj_sh p90_adj_sh p00_adj_sh p15_adj_sh

save "processed_datasets\fua_uc_pop_sh", replace

* Merging to main dataset

use "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", clear
merge m:1 efua_id using "processed_datasets\fua_uc_pop_max"
keep if _merge == 3
drop _merge

merge m:1 efua_id using "processed_datasets\fua_uc_pop_wgt"
keep if _merge == 3
drop _merge

merge m:1 efua_id using "processed_datasets\fua_uc_pop_sh"
keep if _merge == 3
drop _merge

save "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", replace

********************************************************************************
**# Step 5: Merging with city coordinates
********************************************************************************

import delimited "raw_datasets\Maps\cities_coordinates.csv", encoding(UTF-8) clear
keep x y efua_id cntry_name
tempfile cities_coordinates
save `cities_coordinates', replace

use "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", clear
merge m:1 efua_id using `cities_coordinates'
keep if _merge == 3
drop _merge

save "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", replace

********************************************************************************
**# Step 6: Identify capital cities
********************************************************************************

* 1 : Main Capital
* 2 : Alternative Capital

gen capital = 0
replace capital = 1 if efua_id == 4215
replace capital = 1 if efua_id == 1992
replace capital = 2 if efua_id == 1657
replace capital = 1 if efua_id == 209
replace capital = 2 if efua_id == 1165
replace capital = 1 if efua_id == 5465
replace capital = 1 if efua_id == 976
replace capital = 1 if efua_id == 2295
replace capital = 1 if efua_id == 2072
replace capital = 1 if efua_id == 7704
replace capital = 1 if efua_id == 4048
replace capital = 1 if efua_id == 1674
replace capital = 1 if efua_id == 2569
replace capital = 1 if efua_id == 3981
replace capital = 1 if efua_id == 888
replace capital = 1 if efua_id == 5821
replace capital = 1 if efua_id == 2940
replace capital = 1 if efua_id == 2162
replace capital = 1 if efua_id == 550
replace capital = 1 if efua_id == 1302
replace capital = 1 if efua_id == 1301
replace capital = 1 if efua_id == 7466
replace capital = 1 if efua_id == 4897
replace capital = 1 if efua_id == 4686
replace capital = 1 if efua_id == 2709
replace capital = 1 if efua_id == 1507
replace capital = 1 if efua_id == 567
replace capital = 1 if efua_id == 98
replace capital = 1 if efua_id == 446
replace capital = 1 if efua_id == 1322
replace capital = 1 if efua_id == 586
replace capital = 1 if efua_id == 5006
replace capital = 1 if efua_id == 118
replace capital = 1 if efua_id == 1525
replace capital = 1 if efua_id == 594
replace capital = 1 if efua_id == 6502
replace capital = 1 if efua_id == 5621
replace capital = 1 if efua_id == 740
replace capital = 1 if efua_id == 875
replace capital = 1 if efua_id == 465
replace capital = 1 if efua_id == 2327
replace capital = 1 if efua_id == 1338
replace capital = 1 if efua_id == 156
replace capital = 1 if efua_id == 1890
replace capital = 1 if efua_id == 3226
replace capital = 1 if efua_id == 2797
replace capital = 1 if efua_id == 1894
replace capital = 1 if efua_id == 1245
replace capital = 1 if efua_id == 906
replace capital = 1 if efua_id == 7068
replace capital = 1 if efua_id == 4040
replace capital = 1 if efua_id == 1986
replace capital = 1 if efua_id == 2702
replace capital = 1 if efua_id == 2114
replace capital = 1 if efua_id == 799
replace capital = 1 if efua_id == 666
replace capital = 1 if efua_id == 566
replace capital = 1 if efua_id == 310
replace capital = 1 if efua_id == 3700
replace capital = 2 if efua_id == 189
replace capital = 2 if efua_id == 2060
replace capital = 1 if efua_id == 3596
replace capital = 1 if efua_id == 153
replace capital = 1 if efua_id == 3343
replace capital = 1 if efua_id == 3650
replace capital = 1 if efua_id == 3147
replace capital = 1 if efua_id == 2207
replace capital = 1 if efua_id == 2845
replace capital = 1 if efua_id == 1899
replace capital = 1 if efua_id == 3222
replace capital = 1 if efua_id == 2317
replace capital = 1 if efua_id == 57
replace capital = 1 if efua_id == 273
replace capital = 1 if efua_id == 164
replace capital = 1 if efua_id == 224
replace capital = 1 if efua_id == 121
replace capital = 1 if efua_id == 359
replace capital = 1 if efua_id == 3881
replace capital = 1 if efua_id == 101
replace capital = 1 if efua_id == 1091
replace capital = 1 if efua_id == 415

********************************************************************************
**# Step 7: Estimate and rename variables 
********************************************************************************

* Log of Population
gen log_fua_pop_2015 = log(fua_p_2015)
rename fua_p_2015 fua_pop_2015

* Year
gen year = regexs(0) if(regexm(country_year, "[0-9][0-9][0-9][0-9]"))
destring year, replace

* Country
gen country = regexs(0) if(regexm(country_year, "([a-zA-Z]+)"))
replace country = proper(country)

* Dropping South Sudan
drop if country_year == "southsudan_2008"

* Renaming Kyrgyz Republica for consistency with maps_and_cities_classwk dataset
replace country 	 = "Kyrgyz" 	 if country 	 == "Kyrgyzrepublic"
replace country_year = "kyrgyz_2009" if country_year == "kyrgyzrepublic_2009"
replace country_year = "kyrgyz_1999" if country_year == "kyrgyzrepublic_1999"

save "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", replace

********************************************************************************
**# Step 7: Matching missing census categories
/*******************************************************************************
In some censuses a particular industry may not be present in the data. 
When appending all country-year data files, some categories will appear as 
missing for some countries if the category was not available for their 
census-year. When collapsing the dataset, all these missing values disappear 
and are replaced by 0s. It's not clear then, if a 0 is indeed a 0 or a 
hidden missing. We create a set of dummies for each industry category that take 
a value 1 if the industry was a valid option in each country-year census. 
These variables can be used as an aid to determine whether 0s represent 
real 0s or `hidden missings'.
*******************************************************************************/

* List of countries names as downloaded from IPUMS data extract matched to
* list of names used in the current dataset
import excel raw_datasets\Maps\IPUMS_cats_census.xlsx, sheet("countries_list") firstrow clear
tempfile fixname
save `fixname', replace

* List of missing categories from the census
import excel raw_datasets\Maps\IPUMS_cats_census.xlsx, sheet("INDGEN") firstrow clear

* Matching countries names to the ones used in the current dataset
merge m:1 country_ipums using `fixname'
keep if _merge == 3
drop _merge

* Fixing formating of years (in order to destring the variable)
drop if country == "Italy" & year == "2011Q1"
drop if country == "Spain" & year != "1991"
drop if country == "Unitedkingdom"

foreach y of numlist 2012/2019{
	replace year = "`y'" if year == "`y'Q1" & country == "Italy"
}

destring year, replace

* Renaming variables
rename NIUnotinuniverse 				q_niu
rename Agriculturefishingandforest 		q_agri
rename Miningandextraction 				q_mining
rename Manufacturing 					q_mfg
rename Electricitygaswaterandwast 		q_utilities
rename Construction 					q_construction
rename Wholesaleandretailtrade 			q_trade
rename Hotelsandrestaurants 			q_hospitality
rename Transportationstorageandcom 		q_transport
rename Financialservicesandinsurance 	q_fin_insu
rename Publicadministrationanddefens 	q_govmt
rename Servicesnotspecified 			q_serv_notsp
rename Businessservicesandrealestat 	q_bussserv_rs
rename Education 						q_educ
rename Healthandsocialwork 				q_health
rename Otherservices 					q_other_serv
rename Privatehouseholdservices 		q_house_serv
rename Otherindustrynec 				q_other_industry
rename Responsesuppressed 				q_resp_supressed
rename Unknown							q_unknown

* Replacing Xs for 1s
foreach v of varlist q_*{
	replace `v' = "1" if `v' == "X"
	replace `v' = "0" if `v' == "·"
	destring `v', replace
}

* Saving
drop country_ipums
save `fixname', replace

*-------------------------------------------------------------------------------
* Mathcing to main dataset

use "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", clear
merge m:1 country year using `fixname'
keep if _merge == 3
drop _merge

********************************************************************************
**# Step 7: Labeling and Cleaning
********************************************************************************

drop cntry_name
drop q_niu

la var efua_id 					"ID of FUA"
la var country_year				"country and year of census"
la var fua_pop_2015				"FUA population in 2015"
la var log_fua_pop_2015			"Log of FUA population in 2015"
la var x						"X coordinate of FUA"
la var y						"Y coordinate of FUA"
la var capital					"1 if FUA is main capital, 2 if it's second/alt capital"

la var _niu 					"(Weighted) number of people: NIU (not in universe)"
la var _agri 					"(Weighted) number of people: Agriculture, fishing, and forestry"
la var _mining 					"(Weighted) number of people: Mining and extraction"
la var _mfg 					"(Weighted) number of people: Manufacturing"
la var _utilities 				"(Weighted) number of people: Elec., gas, water and waste mngmnt"
la var _construction 			"(Weighted) number of people: Construction"
la var _trade 					"(Weighted) number of people: Wholesale and retail trade"
la var _hospitality 			"(Weighted) number of people: Hotels and restaurants"
la var _transport 				"(Weighted) number of people: Transp., storage, and com."
la var _fin_insu 				"(Weighted) number of people: Financial services and insurance"
la var _govmt 					"(Weighted) number of people: Public administration and defense"
la var _bussserv_rs 			"(Weighted) number of people: Business services and real estate"
la var _educ 					"(Weighted) number of people: Education"
la var _health 					"(Weighted) number of people: Health and social work"
la var _other_serv 				"(Weighted) number of people: Other services"
la var _house_serv 				"(Weighted) number of people: Private household services"
la var _unknown 				"(Weighted) number of people: Unknown"
la var _rowtotal 				"(Weighted) number of people: Total"
la var _other_industry 			"(Weighted) number of people: Other industry, n.e.c."
la var _serv_notsp 				"(Weighted) number of people: Services, not specified"
la var _resp_supressed			"(Weighted) number of people: Response suppressed"

la var share_agri 				"Share of people: Agriculture, fishing, and forestry"
la var share_mining 			"Share of people: Mining and extraction"
la var share_mfg 				"Share of people: Manufacturing"
la var share_utilities 			"Share of people: Elec., gas, water and waste mngmnt"
la var share_construction 		"Share of people: Construction"
la var share_trade 				"Share of people: Wholesale and retail trade"
la var share_hospitality 		"Share of people: Hotels and restaurants"
la var share_transport 			"Share of people: Transp., storage, and com."
la var share_fin_insu 			"Share of people: Financial services and insurance"
la var share_govmt 				"Share of people: Public administration and defense"
la var share_bussserv_rs 		"Share of people: Business services and real estate"
la var share_educ 				"Share of people: Education"
la var share_health 			"Share of people: Health and social work"
la var share_other_serv 		"Share of people: Other services"
la var share_house_serv 		"Share of people: Private household services"
la var share_unknown 			"Share of people: Unknown"
la var share_other_industry 	"Share of people: Other industry, n.e.c."
la var share_serv_notsp 		"Share of people: Services, not specified"
la var share_resp_supressed		"Share of people: Response suppressed"

foreach v in agri mining mfg utilities construction trade hospitality transport fin_insu govmt serv_notsp bussserv_rs educ health other_serv house_serv other_industry resp_supressed unknown{
	la var q_`v'				"Value of 1 if census of country-year asks about `v'"
}

********************************************************************************
**# Step 8: Saving
********************************************************************************

sort efua_id country_year
save "processed_datasets/dataset_maps_and_cities_01.22.2022.dta", replace
